################################################################################
# Master Script for JHR Replication Package
# "Teacher Testing Standards and the New Teacher Pipeline"
# Law, Marks, and Stern
#
# This script runs all analyses to reproduce Tables and Figures
################################################################################

# Clear environment
rm(list = ls())
gc()

# Set working directory to Clean folder
# IMPORTANT: Change this path to the location of the Clean/ folder on your system
setwd("~/path/to/Clean")

# ──────────────────────────────────────────────────────────────────────────────
# Required Packages
# ──────────────────────────────────────────────────────────────────────────────
required_packages <- c(
  # Data manipulation
  "tidyverse", "readxl", "writexl", "haven", "data.table",
  # Fixed effects regressions (Stata reghdfe equivalent)
  "fixest",
  # Tables and output
  "modelsummary", "kableExtra", "stargazer", "flextable", "officer",
  # Visualization
  "ggplot2", "ggrepel", "patchwork", "viridis", "scales",
  # Other
  "glue", "purrr", "broom", "xml2"
)

# Install missing packages
cat("Checking for required packages...\n")
new_packages <- required_packages[!(required_packages %in% installed.packages()[, "Package"])]
if (length(new_packages) > 0) {
  cat("  Installing missing packages:", paste(new_packages, collapse = ", "), "\n")
  install.packages(new_packages, dependencies = TRUE)
} else {
  cat("  All required packages are installed.\n")
}

# Load all packages with error handling
cat("Loading packages...\n")
for (pkg in required_packages) {
  tryCatch({
    library(pkg, character.only = TRUE)
  }, error = function(e) {
    stop(paste("Failed to load package:", pkg, "\n  Error:", e$message))
  })
}
cat("  All packages loaded successfully.\n")

# ──────────────────────────────────────────────────────────────────────────────
# Helper function to source scripts with error handling
# ──────────────────────────────────────────────────────────────────────────────
source_with_status <- function(script_path, step_name) {
  cat("\n", rep("-", 60), "\n", sep = "")
  cat(step_name, "\n")
  cat(rep("-", 60), "\n", sep = "")

  # Check if file exists
  if (!file.exists(script_path)) {
    warning(paste("Script not found:", script_path, "- Skipping this step."))
    return(FALSE)
  }

  # Source the script with error handling
  tryCatch({
    source(script_path)
    cat("  SUCCESS: ", step_name, " completed.\n", sep = "")
    return(TRUE)
  }, error = function(e) {
    cat("  ERROR in ", script_path, ":\n", sep = "")
    cat("    ", e$message, "\n", sep = "")
    return(FALSE)
  })
}

# ──────────────────────────────────────────────────────────────────────────────
# Run Scripts in Order
# ──────────────────────────────────────────────────────────────────────────────

cat("\n")
cat("========================================\n")
cat("Starting JHR Replication Package\n")
cat("========================================\n")

# Track results
results <- list()

# Step 1: Clean ETS/Praxis Data and Create TDI
results$step1 <- source_with_status(
  "code/01_clean_ets_data.R",
  "Step 1: Cleaning ETS data and creating Test Difficulty Index"
)

# Step 2: Clean IPEDS Data
results$step2 <- source_with_status(
  "code/02_clean_ipeds_data.R",
  "Step 2: Cleaning IPEDS enrollment and graduation data"
)

# Step 3: Merge ETS Treatment + IPEDS Data
results$step3 <- source_with_status(
  "code/04_merge_event_data.R",
  "Step 3: Merging ETS treatment and IPEDS data"
)

# Step 4: Create Descriptive Tables
results$step4 <- source_with_status(
  "code/03_create_descriptive_tables.R",
  "Step 4: Creating descriptive tables (Tables 1-3)"
)

# Step 5: Run Secondary Regressions (Placebo, Licenses, Shortages)
results$step5 <- source_with_status(
  "code/05_secondary_regressions.R",
  "Step 5: Running secondary regressions (Figures 5-7)"
)

# Step 6: Run Main Regressions
results$step6 <- source_with_status(
  "code/06_main_regressions.R",
  "Step 6: Running main regressions (Tables 4-7, A1-A2)"
)

# Step 7: Generate All Figures
results$step7 <- source_with_status(
  "code/07_figures.R",
  "Step 7: Generating all figures"
)


# ──────────────────────────────────────────────────────────────────────────────
# Summary
# ──────────────────────────────────────────────────────────────────────────────

cat("\n")
cat("========================================\n")
cat("Replication Summary\n")
cat("========================================\n\n")

# Report status of each step
steps <- c(
  "Step 1: Clean ETS data" = results$step1,
  "Step 2: Clean IPEDS data" = results$step2,
  "Step 3: Merge event data" = results$step3,
  "Step 4: Descriptive tables" = results$step4,
  "Step 5: Secondary regressions" = results$step5,
  "Step 6: Main regressions" = results$step6,
  "Step 7: Generate figures" = results$step7
)

for (step_name in names(steps)) {
  status <- if (isTRUE(steps[[step_name]])) "COMPLETED" else "SKIPPED/FAILED"
  cat("  ", step_name, ": ", status, "\n", sep = "")
}

# Count successes
n_success <- sum(sapply(steps, isTRUE))
n_total <- length(steps)

cat("\n")
if (n_success == n_total) {
  cat("All ", n_total, " steps completed successfully!\n", sep = "")
} else {
  cat(n_success, " of ", n_total, " steps completed. Check warnings above.\n", sep = "")
}

cat("\nOutputs generated:\n")
cat("  - Figures: output/figures/\n")
cat("  - Tables: output/tables/\n")
cat("\n")
